bocc <- read_BOCCResults(path = 'Results/BOCC_Results')
stats2plot <-
c(
'cluster_size',
'gene_ratio',
'HPO_ratio',
'num_sig_go_enrichment_terms',
'max_norm_cell_type_specificity',
'max_norm_cell_type_comma_sep_string',
'max_norm_disease_specificity',
'max_norm_disease_comma_sep_string'
)
df <- bocc %>%
create_stats_df(., stats2plot) %>%
dplyr::mutate(., dplyr::across(c('gene_ratio', 'HPO_ratio'), ~round(.x, 3)))
df %>%
group_by(method) %>%
summarize(
.,
n = length(cluster_id),
min_m = min(cluster_size),
mean_m = mean(cluster_size),
sd_m = sd(cluster_size),
max_m = max(cluster_size),
sum_m = sum(cluster_size)
)
## # A tibble: 4 x 7
## method n min_m mean_m sd_m max_m sum_m
## * <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 cesna_june_22_2021 10 15 71 57.6 201 710
## 2 greedy_june_22_2021 5 3 4274. 5834. 11161 21369
## 3 infomap_june_22_2021 64 2 24.9 53.7 378 1595
## 4 walktrap_june_22_2021 150 1 147. 917. 8821 22088
# %>%
# kableExtra::kable(.,
# format = 'latex',
# digits = 2,
# booktabs = TRUE) %>%
# kableExtra::kable_styling(position = 'center') %>%
# kableExtra::add_header_above(., list(' ' = 2, 'cluster size' = 5))
On a natural scale…
p1 <- ggplot(
df %>%
dplyr::filter(., cluster_size > 1),
aes(
x = max_norm_cell_type_specificity,
y = max_norm_disease_specificity,
color = method,
label = cluster_id,
label2 = cluster_size,
label3 = num_sig_go_enrichment_terms,
label4 = gene_ratio,
label5 = HPO_ratio,
label6 = max_norm_cell_type_comma_sep_string,
label7 = max_norm_disease_comma_sep_string,
)
) +
geom_point(alpha = 0.5, position = 'jitter') +
scale_color_brewer('method', palette = 'Set1') +
labs(x = 'cell type specificity', y = 'disease type specificity') +
theme_bw() +
theme(legend.position = 'bottom',
legend.direction = 'horizontal',
legend.box = 'vertical')
p1
ggsave('AnalyzeResults/figs/bocc_results.png', width = 5, height = 5)
On a log-log scale…
p2 <- p1 +
scale_x_log10(
bquote(~log[10]('cell specificity')),
breaks = scales::trans_breaks("log10", function(x) 10^x),
minor_breaks = log10_minor_break(),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
bquote(~log[10]('disease specificity')),
breaks = scales::trans_breaks("log10", function(x) 10^x),
minor_breaks = log10_minor_break(),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
annotation_logticks()
p2
ggsave('AnalyzeResults/figs/bocc_results_loglog.png', width = 5, height = 5)
As a widget…
p1 <- p1 +
theme(legend.position = 'bottom')
plotly::ggplotly(p1)
We’ll look into the communities with high specificity (\(> 0.50\)).
deep_dive <- df %>%
dplyr::filter(., (max_norm_cell_type_specificity > 0.5 |
max_norm_disease_specificity > 0.5) & cluster_size > 1) %>%
dplyr::select(., method:cluster_size, max_norm_cell_type_specificity,
max_norm_disease_specificity, dplyr::everything())
DT::datatable(deep_dive, options = list(pageLength = 5))
I’ll be investigating the cluster’s identified by cluster_id 17, 18, 41, 84.
Starting with cluster 17:
summarize_cluster(G = G, method = 'walktrap', cluster_id = 17, plot = TRUE)
## $sG
## IGRAPH a4cd54a UN-- 6 5 --
## + attr: name (v/c), node (v/n), gene (v/n), communities (v/x)
## + edges from a4cd54a (vertex names):
## [1] ADGRG1 --HP:0040194 ADGRG1 --HP:0011147 ADGRG1 --HP:0007095
## [4] ADGRG1 --HP:0005684 HP:0002803--HP:0005684
##
## $n
## [1] 6
##
## $m
## [1] 5
I’ll be investigating the clusters identified by cluster_id 5, 17, 30, 37, 38, 42, 46, 49, 52, 53, 54, 57.